This is an analysis for the impact of introducing the lunch program in 1955 the growth trend of the Japanese Boys
The data source is Ministry of Internal Affairs and Communications Statistics Bureau.
www.e-stat.go.jp/SG1/estat/List.do?bid=000001014499&
NOTE: each column represent an age group
data <- read_xlsx("FEH_00400002_221209195404.xlsx")
data[data == "***"] <- NA
data <- clean_names(data)
datatable(data)
Changing the data to long format
data <- data %>% select(year, x17) %>% rename(height = x17) %>% mutate(height = as.numeric(height)) %>% mutate(year = year - 1900)
datatable(data)
max_height <- 175
data$height_scaled <- data$height / max_height
data$logit_height <- log(data$height_scaled / (1 - data$height_scaled))
model_all <- lm(logit_height ~ year, data = data)
data$predicted_all <- predict(model_all, newdata = data)
data$predicted_all <- exp(data$predicted_all)/(1+exp(data$predicted_all))*max_height
datatable(data)
ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
scale_color_manual(labels = c("Observed Heights", "Predicted Data"), values = c("blue", "red")) + geom_line() + geom_line(aes(y = predicted_all, colour = "Predicted Data"))
war_year <- 1940 - 1900
end_war_year <- 1948 - 1900
data_pre_war <- data %>% filter(year <= war_year)
fit_pre_war <- lm(logit_height ~ year, data = data_pre_war)
data$predicted_pre_war <- predict(fit_pre_war, newdata = data)
data$predicted_pre_war <- exp(data$predicted_pre_war)/(1+exp(data$predicted_pre_war))*max_height
datatable(data)
ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
scale_color_manual(labels = c("Observed Heights", "Predicted Data Full Model", "Pre War Model"), values = c("blue", "red", "black")) + geom_line() + geom_line(aes(y = predicted_all, colour = "Predicted Data")) + geom_line(aes(y = predicted_pre_war, colour = "Pre War Projection"))
data_post_war <- data %>% filter(year >= end_war_year)
fit_post_war <- lm(logit_height ~ I(year)^2 + year, data = data_post_war)
data$predicted_post_war <- predict(fit_post_war, newdata = data)
## Warning in predict.lm(fit_post_war, newdata = data): prediction from a
## rank-deficient fit may be misleading
data$predicted_post_war <- exp(data$predicted_post_war)/(1+exp(data$predicted_post_war))*max_height
datatable(data)
data_post_war <- data %>% filter(year >= end_war_year)
fit_post_war_direct <- lm(logit_height ~ I(year)^2, data = data_post_war)
data$predicted_post_war_direct <- predict(fit_post_war_direct, newdata = data)
data$predicted_post_war_direct <- exp(data$predicted_post_war_direct)/(1+exp(data$predicted_post_war_direct))*max_height
datatable(data)
ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
scale_color_manual(labels = c("Observed Heights", "Direct Fit", "Poly"), values = c("blue", "red", "black")) +
geom_line() +
geom_line(aes(y = predicted_post_war_direct, colour = "Direct Fit")) +
geom_line(aes(y = predicted_post_war, colour = "Poly"))
full_plot <- ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
scale_color_manual(labels = c("Observed Heights", "Predicted Data Full Model", "Pre War Model", "Post War Model Poly", "Post War Direct"), values = c("blue", "red", "black", "green", "grey")) +
geom_line() +
geom_line(aes(y = predicted_all, colour = "Predicted Data")) +
geom_line(aes(y = predicted_pre_war, colour = "Pre War Projection")) +
geom_line(aes(y = predicted_post_war, colour = "Post War Model Poly")) +
geom_line(aes(y = predicted_post_war, colour = "Post War Direct"))
ggplotly(full_plot)